# This script reproduces Figure 16 and Table 11 #

rm(list=ls())
options(digits=4)
pkg <- c("PanelMatch", "ggplot2", "plm", 
         "matrixStats",
         "foreign", 
         "stringi", "stringdist",
         "DataCombine","lmtest", "multiwayvcov",
         "data.table",
         "dplyr", 
         "stargazer",
         "clubSandwich", "mediation",
         "doBy")
lapply(pkg, require, character.only = TRUE)

# load datasets
proprietary <- read.csv("proprietary.csv")
load("non_p.RData")
source("functions.R")
# merge 
d_sub1 <- merge(d_sub1, proprietary, by = c("county_id", "time"), all.x = T)

# load mini campaigns
load("mini.RData")

d_sub1$province_county <- paste0(d_sub1$province_name, d_sub1$county_name)

d_sub1 <- d_sub1[,c("province_county",  "county_id", "transaction_area",
            "city_id", "month", "city_name", "log_transaction_area", 
            "log_transaction_area_l1", "tour", "tour_l1", "all_tour", "all_tour_l1",
            "log_gdp_l1", "log_gdppc_l1", "log_gdp", "log_gdppc",
            "city_tour", "log_number_total", 
            "log_expd", "log_industrial",
            "log_hos", "log_fia",
            "log_loan", "log_middle_school",
            "log_rev")]
d_sub1 <- d_sub1[order(d_sub1$province_county, d_sub1$month),]

d_sub1$province_county[which(d_sub1$province_county == "四川省市中区" & 
                           d_sub1$city_name == "内江市")] <- "四川省内江市市中区"

d_sub1$province_county[which(d_sub1$province_county == "四川省市中区" & 
                           d_sub1$city_name == "乐山市")] <- "四川省乐山市市中区"

d_sub1$province_county[which(d_sub1$province_county == "山西省城区" & 
                           d_sub1$city_name == "大同市")] <- "山西省大同市城区"

d_sub1$province_county[which(d_sub1$province_county == "山西省城区" & 
                           d_sub1$city_name == "阳泉市")] <- "山西省阳泉市城区"

d_sub1$province_county[which(d_sub1$province_county == "山西省城区" & 
                           d_sub1$city_name == "长治市")] <- "山西省长治市城区"

d_sub1$province_county[which(d_sub1$province_county == "山西省郊区" & 
                           d_sub1$city_name == "阳泉市")] <- "山西省阳泉市郊区"

d_sub1$province_county[which(d_sub1$province_county == "江苏省鼓楼区" & 
                           d_sub1$city_name == "徐州市")] <- "江苏省徐州市鼓楼区"

d_sub1$province_county[which(d_sub1$province_county == "山东省市中区" & 
                           d_sub1$city_name == "济南市")] <- "山东省济南市市中区"
d_sub1$year <- as.numeric(substrRight(as.character(d_sub1$month), 4))

d_sub1$transaction_area <- exp(d_sub1$log_transaction_area)

d_sub1 <- d_sub1 %>% group_by(province_county, year) %>% 
  dplyr::summarize(transaction_area = mean(transaction_area, na.rm = TRUE),
                   log_gdp = mean(log_gdp,na.rm = T),
                   log_gdppc = mean(log_gdppc,na.rm = T),
                   log_fia = mean(log_fia, na.rm = T),
                   log_loan = mean(log_loan, na.rm = T),
                   log_hos = mean(log_hos, na.rm = T),
                   log_industrial = mean(log_industrial, na.rm = T),
                   log_rev = mean(log_rev, na.rm = T),
                   log_expd = mean(log_expd, na.rm = T),
                   log_middle_school = mean(log_middle_school, na.rm = T))
d_sub1$log_transaction_area <- log(d_sub1$transaction_area)

year_d <- merge(d_sub1, year_d, by = c("province_county", "year"), all.y = T)


#############################################
year_d$national <- 0
year_d$national[year_d$year >= 2015] <- 1
year_d$year <- year_d$year - 2011
sum(year_d$mini_campaign_extent * 12) # 437 incidences 
sum(year_d$mini_campaign_extent[year_d$year < 4] * 12) # 168 incidences 
sum(year_d$mini_campaign_extent[year_d$year < 4] * 12)/3 # 56 incidences 
sum(year_d$mini_campaign_extent[year_d$year >= 4] * 12)/3 # 89.67 incidences 


## linear probability model ##
year_d$log_transaction_area_sd <- standardize(year_d$log_transaction_area)
summary(lm(mini_campaign_yes2 ~ national, data = year_d)) # validity check 2.1 percentage point
####

var_names <- c("log_gdp", "log_gdppc", "log_fia", 
               "log_loan", "log_hos", "log_industrial", 
               "log_rev", "log_expd", "log_middle_school")

for (i in 1:length(var_names)) {
  year_d <- missing_indicator(var_names[i],
                              year_d)
}

year_d$unit <- 
  rep(1:length(unique(year_d$province_county)), each = length(unique(year_d$year)))
year_d$unit <- as.numeric(as.character(year_d$unit))

camp_nocon <- plm(formula = lead(mini_campaign_yes2,1) ~
          log_transaction_area_sd,
          index = c("unit", "year"),
          effect = "twoways",
          data = year_d)
vcov_camp_nocon <-
  clubSandwich::vcovCR(camp_nocon, type="CR1S", cluster = year_d$unit)
se_camp_nocon <- coeftest(camp_nocon, vcov_camp_nocon)
reg_results_land <- list("model" = camp_nocon,
                         "se" = se_camp_nocon)

year_d <- slide(year_d, Var = "mini_campaign_yes2", TimeVar = "year",
      GroupVar = "unit", NewVar = "mini_campaign_yes2_f1")

camp_con <- plm(formula = lead(mini_campaign_yes2,1) ~ 
                    log_transaction_area_sd + 
                  log_gdp + 
                  log_gdppc + 
                  log_fia + 
                  log_loan + 
                  log_hos + 
                  log_industrial + 
                  log_rev + 
                  log_expd + 
                  log_middle_school, 
                  index = c("unit", "year"),
                  effect = "twoways",
                  data = year_d)
vcov_camp_con <- 
  clubSandwich::vcovCR(camp_con, type="CR1S", cluster = year_d$unit)
se_camp_con <- coeftest(camp_con, vcov_camp_con)
reg_results_con_land <- list("model" = camp_con, 
                         "se" = se_camp_con)

lower_camp_nocon_log_transaction_area_sd_95 <- se_camp_nocon["log_transaction_area_sd", "Estimate"] - qnorm(.975) * se_camp_nocon["log_transaction_area_sd", "Std. Error"]
upper_camp_nocon_log_transaction_area_sd_95 <- se_camp_nocon["log_transaction_area_sd", "Estimate"] + qnorm(.975) * se_camp_nocon["log_transaction_area_sd", "Std. Error"]

lower_camp_nocon_log_transaction_area_sd_90 <- se_camp_nocon["log_transaction_area_sd", "Estimate"] - qnorm(.95) * se_camp_nocon["log_transaction_area_sd", "Std. Error"]
upper_camp_nocon_log_transaction_area_sd_90 <- se_camp_nocon["log_transaction_area_sd", "Estimate"] + qnorm(.95) * se_camp_nocon["log_transaction_area_sd", "Std. Error"]

est_log_transaction_area_sd <- se_camp_nocon["log_transaction_area_sd", "Estimate"]

lower_camp_con_log_transaction_area_sd_95 <- se_camp_con["log_transaction_area_sd", "Estimate"] - qnorm(.975) * se_camp_con["log_transaction_area_sd", "Std. Error"]
upper_camp_con_log_transaction_area_sd_95 <- se_camp_con["log_transaction_area_sd", "Estimate"] + qnorm(.975) * se_camp_con["log_transaction_area_sd", "Std. Error"]

lower_camp_con_log_transaction_area_sd_90 <- se_camp_con["log_transaction_area_sd", "Estimate"] - qnorm(.95) * se_camp_con["log_transaction_area_sd", "Std. Error"]
upper_camp_con_log_transaction_area_sd_90 <- se_camp_con["log_transaction_area_sd", "Estimate"] + qnorm(.95) * se_camp_con["log_transaction_area_sd", "Std. Error"]

est_log_transaction_area_sd_con <- se_camp_con["log_transaction_area_sd", "Estimate"]

pdf("output/Figure16.pdf")
plot(x = 0, xlab = "", ylab = "Percentage Points", 
     pch = "",
     ylim = c(-1, 6),
     xlim = c(0.95, 1.15),
     xaxt = "n",
     main = "Decline in Land Auctions Predicts \n Increase in Mini-campaigns")
lines(c(1,1), -100*c(lower_camp_nocon_log_transaction_area_sd_95, upper_camp_nocon_log_transaction_area_sd_95))
lines(c(1,1), -100*c(lower_camp_nocon_log_transaction_area_sd_90, upper_camp_nocon_log_transaction_area_sd_90), 
      lwd = 5, col = "grey")
points(1, -100*est_log_transaction_area_sd, pch = 19)
text(1, 0.75, "Without controls")

lines(c(1.1,1.1), -100*c(lower_camp_con_log_transaction_area_sd_95, upper_camp_con_log_transaction_area_sd_95))
lines(c(1.1,1.1), -100*c(lower_camp_con_log_transaction_area_sd_90, upper_camp_con_log_transaction_area_sd_90), 
      lwd = 5, col = "grey")
points(1.1, -100*est_log_transaction_area_sd_con, pch = 19)
text(1.1, 0.75, "With controls")

abline(h = 0, lty = 3)
dev.off()

##############################################################################################
### Making a table ###
sink("output/Table11.tex")
stargazer(reg_results_land$model,
          reg_results_con_land$model,
          font.size = "large", digits = 4, 
          type = "latex",
            dep.var.labels = c("Having Mini-campaign Next Year",
                             "Having Mini-campaign Next Year"),
          column.labels = c("Having Mini-campaign Next Year",
                            "Having Mini-campaign Next Year"),
          dep.var.labels.include = F,
          style = "qje", no.space = FALSE, keep.stat = c("n", "rsq"),
          omit = c("unit", "year",
                   "log_gdp_missing", 
                   "log_gdppc_missing", 
                   "log_fia_missing", 
                   "log_loan_missing", 
                   "log_hos_missing",
                   "log_industrial_missing", 
                   "log_rev_missing", 
                   "log_expd_missing", 
                   "log_middle_school_missing"
                   
          ),
          omit.yes.no = c("No", "Yes"),
          add.lines = list(c("County FE?", "Yes", "Yes"),
                           c("Time FE?", "Yes", "Yes"),
                           c("Controls?", "No", "Yes")
          ),

          covariate.labels = c("Logged Land Auction last year",
                               "logged GDP per capita last year", 
                               "logged total GDP last year", 
                               "logged fixed asset investment last year",
                               "logged volume of loans last year", 
                               "logged number of hospitals last year", 
                               "logged amount of industrial output last year", 
                               "logged revenue last year", 
                               "logged expenditure last year", 
                               "logged number of secondary schools last year"),
          star.cutoffs = c(.10, .05, .01), 
          se = list(reg_results_land$se[,2], 
                    reg_results_con_land$se[,2]),
          p = list(reg_results_land$se[,4], 
                   reg_results_con_land$se[,4]),
          header = F, notes = "robust standard errors clustered by county in parentheses", 
          float = F)
sink()
